About the dataset The original dataset was collected by Sam using software from the National Cancer Institute’s Surveillance, Epidemiology, and End Results Program (known as the SEER*Stat Software) to access the National Vital Statistics System that is maintained by the National Center for Health Statistics.

dat <- read.table("/Users/corinneriddell/Dropbox/BlackWhiteGap/Data/deaths-cause-6913.txt", header = F,sep="\t")
names(dat) <- c("State", "Age", "Sex", "Race", "COD", "Year", "Crude_Rate", "Count", "Population")
dat <- dat %>%
  mutate(State2 = factor(State, levels = c(0:50), labels = c("Alabama", "Alaska", "Arizona", "Arkansas","California",
                                                             "Colorado", "Connecticut", "Delaware", "Washington DC",
                                                             "Florida", "Georgia", "Hawaii","Idaho", "Illinois", 
                                                             "Indiana","Iowa", "Kansas", "Kentucky", "Louisiana", 
                                                             "Maine", "Maryland", "Massachusetts", "Michigan", 
                                                             "Minnesota", "Mississippi","Missouri", "Montana", 
                                                             "Nebraska", "Nevada", "New Hampshire", "New Jersey",
                                                             "New Mexico", "New York","North Carolina","North Dakota",
                                                             "Ohio", "Oklahoma","Oregon", "Pennsylvania", 
                                                             "Rhode Island", "South Carolina", "South Dakota", 
                                                             "Tennessee", "Texas", "Utah", "Vermont", "Virginia",
                                                             "Washington", "West Virginia", "Wisconsin", "Wyoming")), 
         Age2 = factor(Age, levels = c(0:19), labels = c("<1 year", "1-4 years", "5-9 years", "10-14 years", 
                                                         "15-19 years", "20-24 years", "25-29 years", "30-34 years",
                                                         "35-39 years", "40-44 years", "45-49 years", "50-54 years", 
                                                         "55-59 years", "60-64 years", "65-69 years", "70-74 years", 
                                                         "75-79 years", "80-84 years","85+ years", "Unknown"), ordered = T),
         Sex2 = factor(Sex, levels = c(0, 1), labels = c("Male", "Female")),
         Race2 = factor(Race, levels = c(0, 1), labels = c("White", "Black")),
         COD2 = factor(COD, levels = c(0:5), labels = c("Cardiovascular", "Cancers", "Communicable", 
                                                        "Non-communicable", "Injuries", "All other causes")), 
         Year2 = factor(Year, levels = c(0:44), labels = as.character(1969:2013)),
         RaceSex = interaction(Race2, Sex2),
         StateYearRaceSex = interaction(State2, Year2, Race2, Sex2))

Some of the rows have Population==0, and we explore that and record below. These mostly correspond to rows with Unknown age groups that we cannot use in our analysis so I remove them. This leaves 654 rows with Populations of 0.

table(dat$Population==0, useNA = "always")
## 
##   FALSE    TRUE    <NA> 
## 1045866   55734       0
#55734 cells have Populations of 0.

table(dat$Count[dat$Population==0]==0, useNA = "always")
## 
##  TRUE  <NA> 
## 55708    26
#Of these, all have Counts of 0, except for 26 with counts of NA

table(dat$Age2 == "Unknown", dat$Population == 0, useNA = "always")
##        
##           FALSE    TRUE    <NA>
##   FALSE 1045866     654       0
##   TRUE        0   55080       0
##   <NA>        0       0       0
#The vast majority of cells with populations of 0 have unknown age groups

#remove this data from the data we analyze:
dat.clean <- dat[dat$Age2 != "Unknown", ]

table(dat.clean$Population == 0, useNA = "always")
## 
##   FALSE    TRUE    <NA> 
## 1045866     654       0
#654 cells have Populations of 0 now.

table(dat.clean$Count[dat.clean$Population == 0] == 0, useNA = "always")
## 
## TRUE <NA> 
##  628   26
summary(dat.clean$Count[dat.clean$Population == 0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0       0       0       0       0       0      26
#628 are equal to 0 and 26 are equal to NA

#replace the NA Counts with 0 for the 26 cells with Populations of 0.
dat.clean$Count[is.na(dat.clean$Count) == T & dat.clean$Population == 0] <- 0
summary(dat.clean$Count[dat.clean$Population == 0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0
#now the Counts should only equal NA when the true count is between 1 and 9.

summary(dat.clean$Crude_Rate[dat.clean$Population == 0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      NA      NA      NA     NaN      NA      NA     654
table(dat.clean$Crude_Rate[dat.clean$Population==0], useNA = "always")
## 
## <NA> 
##  654
#whenever the population is 0, the Crude_Rate is equal to NA.
#need to think: should these be equal to 0, or NA? How will it affect the calculations?

Missingness of Count (of deaths) If there are between 1 and 9 deaths in the strata, this information is surpressed and coded as missing. Later in this document we investigate trends in supression/missingness to understand how it differs according to state, age, race, and sex.

Since our goal is to compute the difference in life expectancy between Blacks and Whites by strata, we impute death counts at their logical extremes to explore this affects the results.

dat.clean <- dat.clean %>% mutate(Count_md1 = ifelse(is.na(Count)==T, 1, Count),
                                  Count_md5 = ifelse(is.na(Count)==T, 5, Count),
                                  Count_md9 = ifelse(is.na(Count)==T, 9, Count)
                                  )

dat.clean$Count_b9w1 <- dat.clean$Count
dat.clean$Count_b9w1[is.na(dat.clean$Count) & dat.clean$Race2 == "White"] <- 1
dat.clean$Count_b9w1[is.na(dat.clean$Count) & dat.clean$Race2 == "Black"] <- 9

dat.clean$Count_b1w9 <- dat.clean$Count
dat.clean$Count_b1w9[is.na(dat.clean$Count) & dat.clean$Race2 == "White"] <- 9
dat.clean$Count_b1w9[is.na(dat.clean$Count) & dat.clean$Race2 == "Black"] <- 1

#check recoding
dat.clean$Count_grouped <- cut(dat.clean$Count, c(-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22040))
dat.clean$Count1_grouped <- cut(dat.clean$Count_md1, c(-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22040))
dat.clean$Count5_grouped <- cut(dat.clean$Count_md5, c(-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22040))
dat.clean$Count9_grouped <- cut(dat.clean$Count_md9, c(-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22040))
dat.clean$Count_b9w1_grouped <- cut(dat.clean$Count_b9w1, c(-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22040))
dat.clean$Count_b1w9_grouped <- cut(dat.clean$Count_b1w9, c(-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22040))

table(dat.clean$Count_grouped, dat.clean$Count1_grouped, useNA = "always")
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]       228313      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0 320202      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0  14812       0            0      0
##   (10,11]           0      0      0   13272            0      0
##   (11,2.2e+04]      0      0      0       0       469921      0
##   <NA>              0      0      0       0            0      0
table(dat.clean$Count_grouped, dat.clean$Count5_grouped, useNA = "always")
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]       228313      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0      0      0      0      0 320202      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0  14812       0            0      0
##   (10,11]           0      0      0   13272            0      0
##   (11,2.2e+04]      0      0      0       0       469921      0
##   <NA>              0      0      0       0            0      0
table(dat.clean$Count_grouped, dat.clean$Count9_grouped, useNA = "always")
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]       228313      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0      0      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0  14812       0            0      0
##   (10,11]           0      0      0   13272            0      0
##   (11,2.2e+04]      0      0      0       0       469921      0
##   <NA>              0 320202      0       0            0      0
table(dat.clean$Count_grouped, dat.clean$Count_b9w1_grouped, dat.clean$Race2, useNA = "always")
## , ,  = White
## 
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]        46501      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0 145697      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0   7751       0            0      0
##   (10,11]           0      0      0    6920            0      0
##   (11,2.2e+04]      0      0      0       0       316391      0
##   <NA>              0      0      0       0            0      0
## 
## , ,  = Black
## 
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]       181812      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0      0      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0   7061       0            0      0
##   (10,11]           0      0      0    6352            0      0
##   (11,2.2e+04]      0      0      0       0       153530      0
##   <NA>              0 174505      0       0            0      0
## 
## , ,  = NA
## 
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]            0      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0      0      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0      0       0            0      0
##   (10,11]           0      0      0       0            0      0
##   (11,2.2e+04]      0      0      0       0            0      0
##   <NA>              0      0      0       0            0      0
table(dat.clean$Count_grouped, dat.clean$Count_b1w9_grouped, dat.clean$Race2, useNA = "always")
## , ,  = White
## 
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]        46501      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0      0      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0   7751       0            0      0
##   (10,11]           0      0      0    6920            0      0
##   (11,2.2e+04]      0      0      0       0       316391      0
##   <NA>              0 145697      0       0            0      0
## 
## , ,  = Black
## 
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]       181812      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0 174505      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0   7061       0            0      0
##   (10,11]           0      0      0    6352            0      0
##   (11,2.2e+04]      0      0      0       0       153530      0
##   <NA>              0      0      0       0            0      0
## 
## , ,  = NA
## 
##               
##                (-1,0]  (0,1]  (1,2]  (2,3]  (3,4]  (4,5]  (5,6]  (6,7]
##   (-1,0]            0      0      0      0      0      0      0      0
##   (0,1]             0      0      0      0      0      0      0      0
##   (1,2]             0      0      0      0      0      0      0      0
##   (2,3]             0      0      0      0      0      0      0      0
##   (3,4]             0      0      0      0      0      0      0      0
##   (4,5]             0      0      0      0      0      0      0      0
##   (5,6]             0      0      0      0      0      0      0      0
##   (6,7]             0      0      0      0      0      0      0      0
##   (7,8]             0      0      0      0      0      0      0      0
##   (8,9]             0      0      0      0      0      0      0      0
##   (9,10]            0      0      0      0      0      0      0      0
##   (10,11]           0      0      0      0      0      0      0      0
##   (11,2.2e+04]      0      0      0      0      0      0      0      0
##   <NA>              0      0      0      0      0      0      0      0
##               
##                 (7,8]  (8,9] (9,10] (10,11] (11,2.2e+04]   <NA>
##   (-1,0]            0      0      0       0            0      0
##   (0,1]             0      0      0       0            0      0
##   (1,2]             0      0      0       0            0      0
##   (2,3]             0      0      0       0            0      0
##   (3,4]             0      0      0       0            0      0
##   (4,5]             0      0      0       0            0      0
##   (5,6]             0      0      0       0            0      0
##   (6,7]             0      0      0       0            0      0
##   (7,8]             0      0      0       0            0      0
##   (8,9]             0      0      0       0            0      0
##   (9,10]            0      0      0       0            0      0
##   (10,11]           0      0      0       0            0      0
##   (11,2.2e+04]      0      0      0       0            0      0
##   <NA>              0      0      0       0            0      0
dat.clean <- dat.clean[ , !names(dat.clean) %in% c("Count_grouped", "Count1_grouped", "Count5_grouped", "Count9_grouped", 
                                                   "Count_b9w1_grouped", "Count_b1w9_grouped")]

To calculate life expectancy, we have to aggregate the data across the causes of death.

dat2 <- dat.clean %>% group_by(State2, Year2, RaceSex, Age2) %>%
  arrange(State2, RaceSex, Year2, Age2) %>%
  summarise(total_deaths1 = sum(Count_md1), total_deaths5 = sum(Count_md5),
            total_deaths9 = sum(Count_md9), 
            total_deaths_b9 = sum(Count_b9w1), total_deaths_w9 = sum(Count_b1w9),
            Population = first(Population), Race2 = first(Race2), Sex2 = first(Sex2), 
            StateYearRaceSex = first(StateYearRaceSex))

We implemented the calculations performed in the technical supplement found here [http://www.lho.org.uk/Download/Public/7656/1/tech_supp_3.pdf], also saved in our dropbox.

Let’s make sure we can do the calculation all the way through for a few of the groupings, including a grouping with a stratum with a zero population count.

dat2$numYrsInt <- 5
dat2$numYrsInt[dat2$Age2 == "<1 year"] <- 1
dat2$numYrsInt[dat2$Age2 == "1-4 years"] <- 4

dat2 <- dat2 %>% mutate( 
            Death_Rate1 = ifelse(Population!=0, total_deaths1/Population, NA),
            Death_Rate5 = ifelse(Population!=0, total_deaths5/Population, NA),
            Death_Rate9 = ifelse(Population!=0, total_deaths9/Population, NA),
            Death_Rateb9 = ifelse(Population!=0, total_deaths_b9/Population, NA),
            Death_Ratew9 = ifelse(Population!=0, total_deaths_w9/Population, NA),
            Ave_Lived_by_Died = ifelse(Age2=="<1 year", 0.1, 0.5),
            Prob_Dying1 = ifelse(Population!=0, 
                                 numYrsInt*Death_Rate1/(1 + numYrsInt*(1 - Ave_Lived_by_Died)*Death_Rate1), 
                                 NA),
            Prob_Dying5 = ifelse(Population!=0, 
                                 numYrsInt*Death_Rate5/(1 + numYrsInt*(1 - Ave_Lived_by_Died)*Death_Rate5), 
                                 NA),
            Prob_Dying9 = ifelse(Population!=0, 
                                 numYrsInt*Death_Rate9/(1 + numYrsInt*(1 - Ave_Lived_by_Died)*Death_Rate9), 
                                 NA),
            Prob_Dyingb9 = ifelse(Population!=0, 
                                 numYrsInt*Death_Rateb9/(1 + numYrsInt*(1 - Ave_Lived_by_Died)*Death_Rateb9), 
                                 NA),
            Prob_Dyingw9 = ifelse(Population!=0, 
                                 numYrsInt*Death_Ratew9/(1 + numYrsInt*(1 - Ave_Lived_by_Died)*Death_Ratew9), 
                                 NA),
            Prob_Surv1 = ifelse(Population!=0, 1 - Prob_Dying1, NA),
            Prob_Surv5 = ifelse(Population!=0, 1 - Prob_Dying5, NA),
            Prob_Surv9 = ifelse(Population!=0, 1 - Prob_Dying9, NA),            
            Prob_Survb9 = ifelse(Population!=0, 1 - Prob_Dyingb9, NA),
            Prob_Survw9 = ifelse(Population!=0, 1 - Prob_Dyingw9, NA),
            alive_at_start_int1 = ifelse(Age2 == "<1 year", 1000000, NA),
            alive_at_start_int5 = ifelse(Age2 == "<1 year", 1000000, NA),
            alive_at_start_int9 = ifelse(Age2 == "<1 year", 1000000, NA),
            alive_at_start_intb9 = ifelse(Age2 == "<1 year", 1000000, NA),
            alive_at_start_intw9 = ifelse(Age2 == "<1 year", 1000000, NA),
            deaths_during_int1 = ifelse(Age2 == "<1 year", alive_at_start_int1*Prob_Dying1, NA),
            deaths_during_int5 = ifelse(Age2 == "<1 year", alive_at_start_int5*Prob_Dying5, NA),
            deaths_during_int9 = ifelse(Age2 == "<1 year", alive_at_start_int9*Prob_Dying9, NA),
            deaths_during_intb9 = ifelse(Age2 == "<1 year", alive_at_start_intb9*Prob_Dyingb9, NA),
            deaths_during_intw9 = ifelse(Age2 == "<1 year", alive_at_start_intw9*Prob_Dyingw9, NA))
levels(dat2$StateYearRaceSex)[1:5]
## [1] "Alabama.1969.White.Male"    "Alaska.1969.White.Male"    
## [3] "Arizona.1969.White.Male"    "Arkansas.1969.White.Male"  
## [5] "California.1969.White.Male"
summary(dat2$Population[dat2$StateYearRaceSex %in% levels(dat2$StateYearRaceSex)[1:5]])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     143   12660   42320  121900   82940  881100
timer1 <- system.time (
  for(x in levels(dat2$StateYearRaceSex)[1:5]) {
      for(row in 2:length(dat2$alive_at_start_int1[dat2$StateYearRaceSex == x])) { 
        dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row - 1]
        
          if (row != length(dat2$alive_at_start_int1[dat2$StateYearRaceSex == x])) {
              dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying1[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying5[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying9[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dyingb9[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dyingw9[dat2$StateYearRaceSex == x][row]
          }
          else { dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row] #everyone dies in the 85+ grouping
                 dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row]
          
          
          } 
        }
     
  }
)

timer1 # 25 seconds for 5 -- guess 13 hrs for all the strata
##    user  system elapsed 
##  19.540   5.984  26.170
timer3 <- system.time (
  for(x in levels(dat2$StateYearRaceSex)[6:1000]) {
      for(row in 2:length(dat2$alive_at_start_int1[dat2$StateYearRaceSex == x])) { 
        dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row - 1]
        
          if (row != length(dat2$alive_at_start_int1[dat2$StateYearRaceSex == x])) {
              dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying1[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying5[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying9[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dyingb9[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dyingw9[dat2$StateYearRaceSex == x][row]
          }
          else { dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row] #everyone dies in the 85+ grouping
                 dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row]
          
          
          } 
        }
     
  }
)

timer3
##     user   system  elapsed 
## 3680.117 1121.939 4818.074
dat2$num_PY_int1 <- (dat2$alive_at_start_int1 - dat2$deaths_during_int1)*dat2$numYrsInt + (dat2$deaths_during_int1*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_int5 <- (dat2$alive_at_start_int5 - dat2$deaths_during_int5)*dat2$numYrsInt + (dat2$deaths_during_int5*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_int9 <- (dat2$alive_at_start_int9 - dat2$deaths_during_int9)*dat2$numYrsInt + (dat2$deaths_during_int9*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_intb9 <- (dat2$alive_at_start_intb9 - dat2$deaths_during_intb9)*dat2$numYrsInt + (dat2$deaths_during_intb9*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_intw9 <- (dat2$alive_at_start_intw9 - dat2$deaths_during_intw9)*dat2$numYrsInt + (dat2$deaths_during_intw9*dat2$Ave_Lived_by_Died*dat2$numYrsInt)

dat2$num_PY_int1[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_int1[dat2$Age2 == "85+ years"])/dat2$Death_Rate1[dat2$Age2 == "85+ years"]
dat2$num_PY_int5[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_int5[dat2$Age2 == "85+ years"])/dat2$Death_Rate5[dat2$Age2 == "85+ years"]
dat2$num_PY_int9[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_int9[dat2$Age2 == "85+ years"])/dat2$Death_Rate9[dat2$Age2 == "85+ years"]
dat2$num_PY_intb9[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_intb9[dat2$Age2 == "85+ years"])/dat2$Death_Rateb9[dat2$Age2 == "85+ years"]
dat2$num_PY_intw9[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_intw9[dat2$Age2 == "85+ years"])/dat2$Death_Ratew9[dat2$Age2 == "85+ years"]


dat2 <- dat2 %>% group_by(StateYearRaceSex) %>% arrange(desc(Age2)) %>%
                 mutate(tot_PY_after1 = cumsum(num_PY_int1), tot_PY_after5 = cumsum(num_PY_int5), tot_PY_after9 = cumsum(num_PY_int9),
                        tot_PY_afterb9 = cumsum(num_PY_intb9),tot_PY_afterw9 = cumsum(num_PY_intw9),
                        life_exp1 = tot_PY_after1/alive_at_start_int1, life_exp5 = tot_PY_after5/alive_at_start_int5, 
                        life_exp9 = tot_PY_after9/alive_at_start_int9, 
                        life_expb9 = tot_PY_afterb9/alive_at_start_intb9, life_expw9 = tot_PY_afterw9/alive_at_start_intw9 
                )

dat2 <- dat2 %>% group_by(StateYearRaceSex) %>% arrange(Age2)
save.image(file = "/Users/corinneriddell/Dropbox/BlackWhiteGap/Data/Sep6_BWgap.Rdata")
timer4 <- system.time (
  for(x in levels(dat2$StateYearRaceSex)[1001:9180]) {
      for(row in 2:length(dat2$alive_at_start_int1[dat2$StateYearRaceSex == x])) { 
        dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row - 1]
        dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row - 1] - dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row - 1]
        
          if (row != length(dat2$alive_at_start_int1[dat2$StateYearRaceSex == x])) {
              dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying1[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying5[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dying9[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dyingb9[dat2$StateYearRaceSex == x][row]
              dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row]*dat2$Prob_Dyingw9[dat2$StateYearRaceSex == x][row]
          }
          else { dat2$deaths_during_int1[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int1[dat2$StateYearRaceSex == x][row] #everyone dies in the 85+ grouping
                 dat2$deaths_during_int5[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int5[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_int9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_int9[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_intb9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intb9[dat2$StateYearRaceSex == x][row]
                 dat2$deaths_during_intw9[dat2$StateYearRaceSex == x][row] <- dat2$alive_at_start_intw9[dat2$StateYearRaceSex == x][row]
          
          
          } 
        }
     
  }
)

timer4
##      user    system   elapsed 
## 38347.170  8873.016 47261.380
dat2$num_PY_int1 <- (dat2$alive_at_start_int1 - dat2$deaths_during_int1)*dat2$numYrsInt + (dat2$deaths_during_int1*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_int5 <- (dat2$alive_at_start_int5 - dat2$deaths_during_int5)*dat2$numYrsInt + (dat2$deaths_during_int5*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_int9 <- (dat2$alive_at_start_int9 - dat2$deaths_during_int9)*dat2$numYrsInt + (dat2$deaths_during_int9*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_intb9 <- (dat2$alive_at_start_intb9 - dat2$deaths_during_intb9)*dat2$numYrsInt + (dat2$deaths_during_intb9*dat2$Ave_Lived_by_Died*dat2$numYrsInt)
dat2$num_PY_intw9 <- (dat2$alive_at_start_intw9 - dat2$deaths_during_intw9)*dat2$numYrsInt + (dat2$deaths_during_intw9*dat2$Ave_Lived_by_Died*dat2$numYrsInt)

dat2$num_PY_int1[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_int1[dat2$Age2 == "85+ years"])/dat2$Death_Rate1[dat2$Age2 == "85+ years"]
dat2$num_PY_int5[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_int5[dat2$Age2 == "85+ years"])/dat2$Death_Rate5[dat2$Age2 == "85+ years"]
dat2$num_PY_int9[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_int9[dat2$Age2 == "85+ years"])/dat2$Death_Rate9[dat2$Age2 == "85+ years"]
dat2$num_PY_intb9[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_intb9[dat2$Age2 == "85+ years"])/dat2$Death_Rateb9[dat2$Age2 == "85+ years"]
dat2$num_PY_intw9[dat2$Age2 == "85+ years"] <- (dat2$alive_at_start_intw9[dat2$Age2 == "85+ years"])/dat2$Death_Ratew9[dat2$Age2 == "85+ years"]


dat2 <- dat2 %>% group_by(StateYearRaceSex) %>% arrange(desc(Age2)) %>%
                 mutate(tot_PY_after1 = cumsum(num_PY_int1), tot_PY_after5 = cumsum(num_PY_int5), tot_PY_after9 = cumsum(num_PY_int9),
                        tot_PY_afterb9 = cumsum(num_PY_intb9),tot_PY_afterw9 = cumsum(num_PY_intw9),
                        life_exp1 = tot_PY_after1/alive_at_start_int1, life_exp5 = tot_PY_after5/alive_at_start_int5, 
                        life_exp9 = tot_PY_after9/alive_at_start_int9, 
                        life_expb9 = tot_PY_afterb9/alive_at_start_intb9, life_expw9 = tot_PY_afterw9/alive_at_start_intw9 
                )

dat2 <- dat2 %>% group_by(StateYearRaceSex) %>% arrange(Age2)
save.image(file = "/Users/corinneriddell/Dropbox/BlackWhiteGap/Data/Sep6_BWgap.Rdata")

I compared the following with my calculations in the excel spreadsheet to check the calculations – all good.

dat2[dat2$StateYearRaceSex == levels(dat2$StateYearRaceSex)[1], c("StateYearRaceSex", "Age2", "Population", "total_deaths1", 
                                                                       "Death_Rate1", "Ave_Lived_by_Died", "numYrsInt", "Prob_Dying1", "Prob_Surv1", 
                                                                       "alive_at_start_int1", "deaths_during_int1", "num_PY_int1", "tot_PY_after1", "life_exp1")]
## Source: local data frame [19 x 14]
## Groups: StateYearRaceSex [1]
## 
##           StateYearRaceSex        Age2 Population total_deaths1
##                     <fctr>       <ord>      <int>         <dbl>
## 1  Alabama.1969.White.Male     <1 year      22191           525
## 2  Alabama.1969.White.Male   1-4 years      82349            69
## 3  Alabama.1969.White.Male   5-9 years     123579            65
## 4  Alabama.1969.White.Male 10-14 years     127677            59
## 5  Alabama.1969.White.Male 15-19 years     116602           233
## 6  Alabama.1969.White.Male 20-24 years      97143           223
## 7  Alabama.1969.White.Male 25-29 years      85176           169
## 8  Alabama.1969.White.Male 30-34 years      72839           157
## 9  Alabama.1969.White.Male 35-39 years      71703           228
## 10 Alabama.1969.White.Male 40-44 years      75470           418
## 11 Alabama.1969.White.Male 45-49 years      73401           588
## 12 Alabama.1969.White.Male 50-54 years      66820           806
## 13 Alabama.1969.White.Male 55-59 years      60371          1193
## 14 Alabama.1969.White.Male 60-64 years      51539          1458
## 15 Alabama.1969.White.Male 65-69 years      37223          1536
## 16 Alabama.1969.White.Male 70-74 years      25854          1608
## 17 Alabama.1969.White.Male 75-79 years      17141          1608
## 18 Alabama.1969.White.Male 80-84 years       9865          1251
## 19 Alabama.1969.White.Male   85+ years       5500          1125
## # ... with 10 more variables: Death_Rate1 <dbl>, Ave_Lived_by_Died <dbl>,
## #   numYrsInt <dbl>, Prob_Dying1 <dbl>, Prob_Surv1 <dbl>,
## #   alive_at_start_int1 <dbl>, deaths_during_int1 <dbl>,
## #   num_PY_int1 <dbl>, tot_PY_after1 <dbl>, life_exp1 <dbl>
dat2[dat2$StateYearRaceSex == levels(dat2$StateYearRaceSex)[4], c("StateYearRaceSex", "Age2", "Population", "total_deaths1", 
                                                                       "Death_Rate1", "Ave_Lived_by_Died", "numYrsInt", "Prob_Dying1", "Prob_Surv1", 
                                                                       "alive_at_start_int1", "deaths_during_int1", "num_PY_int1", "tot_PY_after1", "life_exp1")]
## Source: local data frame [19 x 14]
## Groups: StateYearRaceSex [1]
## 
##            StateYearRaceSex        Age2 Population total_deaths1
##                      <fctr>       <ord>      <int>         <dbl>
## 1  Arkansas.1969.White.Male     <1 year      12562           250
## 2  Arkansas.1969.White.Male   1-4 years      47195            51
## 3  Arkansas.1969.White.Male   5-9 years      73748            29
## 4  Arkansas.1969.White.Male 10-14 years      74435            36
## 5  Arkansas.1969.White.Male 15-19 years      68811           119
## 6  Arkansas.1969.White.Male 20-24 years      53420           113
## 7  Arkansas.1969.White.Male 25-29 years      48584            85
## 8  Arkansas.1969.White.Male 30-34 years      40779            68
## 9  Arkansas.1969.White.Male 35-39 years      39188            93
## 10 Arkansas.1969.White.Male 40-44 years      41981           209
## 11 Arkansas.1969.White.Male 45-49 years      43188           342
## 12 Arkansas.1969.White.Male 50-54 years      42253           495
## 13 Arkansas.1969.White.Male 55-59 years      41026           772
## 14 Arkansas.1969.White.Male 60-64 years      37760           961
## 15 Arkansas.1969.White.Male 65-69 years      30445          1169
## 16 Arkansas.1969.White.Male 70-74 years      22426          1289
## 17 Arkansas.1969.White.Male 75-79 years      16337          1360
## 18 Arkansas.1969.White.Male 80-84 years       9053          1070
## 19 Arkansas.1969.White.Male   85+ years       5376          1074
## # ... with 10 more variables: Death_Rate1 <dbl>, Ave_Lived_by_Died <dbl>,
## #   numYrsInt <dbl>, Prob_Dying1 <dbl>, Prob_Surv1 <dbl>,
## #   alive_at_start_int1 <dbl>, deaths_during_int1 <dbl>,
## #   num_PY_int1 <dbl>, tot_PY_after1 <dbl>, life_exp1 <dbl>
dat2[dat2$StateYearRaceSex == levels(dat2$StateYearRaceSex)[2789], c("StateYearRaceSex", "Age2", "Population", "total_deaths1", 
                                                                       "Death_Rate1", "Ave_Lived_by_Died", "numYrsInt", "Prob_Dying1", "Prob_Surv1", 
                                                                       "alive_at_start_int1", "deaths_during_int1", "num_PY_int1", "tot_PY_after1", "life_exp1")]
## Source: local data frame [19 x 14]
## Groups: StateYearRaceSex [1]
## 
##                StateYearRaceSex        Age2 Population total_deaths1
##                          <fctr>       <ord>      <int>         <dbl>
## 1  North Dakota.1978.Black.Male     <1 year         41             0
## 2  North Dakota.1978.Black.Male   1-4 years        132             0
## 3  North Dakota.1978.Black.Male   5-9 years        141             0
## 4  North Dakota.1978.Black.Male 10-14 years         85             0
## 5  North Dakota.1978.Black.Male 15-19 years        193             0
## 6  North Dakota.1978.Black.Male 20-24 years        522             0
## 7  North Dakota.1978.Black.Male 25-29 years        241             0
## 8  North Dakota.1978.Black.Male 30-34 years        126             0
## 9  North Dakota.1978.Black.Male 35-39 years        163             0
## 10 North Dakota.1978.Black.Male 40-44 years         71             1
## 11 North Dakota.1978.Black.Male 45-49 years         16             0
## 12 North Dakota.1978.Black.Male 50-54 years          6             0
## 13 North Dakota.1978.Black.Male 55-59 years          0             0
## 14 North Dakota.1978.Black.Male 60-64 years          5             0
## 15 North Dakota.1978.Black.Male 65-69 years          2             0
## 16 North Dakota.1978.Black.Male 70-74 years          5             0
## 17 North Dakota.1978.Black.Male 75-79 years          1             0
## 18 North Dakota.1978.Black.Male 80-84 years          1             0
## 19 North Dakota.1978.Black.Male   85+ years          2             0
## # ... with 10 more variables: Death_Rate1 <dbl>, Ave_Lived_by_Died <dbl>,
## #   numYrsInt <dbl>, Prob_Dying1 <dbl>, Prob_Surv1 <dbl>,
## #   alive_at_start_int1 <dbl>, deaths_during_int1 <dbl>,
## #   num_PY_int1 <dbl>, tot_PY_after1 <dbl>, life_exp1 <dbl>

Initial graphs on missing data

Notes on Figure 1 (below):

Figure 1: Proportion missing by State (panel) and Age group (x-axis), ordered by increasing size of black population in 2013

Notes on Figure 2 (below):

Figure 2: Proportion zero by State (panel) and Age group (x-axis), ordered by increasing size of black population in 2013

Other plots (less interesting/informative)

How is missingness related to age, race, and sex?

Let’s look at the proportion with 0’s

Missing or zero (which is equivalent to < 10)

save.image(file = "/Users/corinneriddell/Dropbox/BlackWhiteGap/Data/Sep6_BWgap.Rdata")